R Markdown

Has the COVID-19 pandemic influenced antidepressant prescribing patterns during the winteseason (September-October) across Scottish health boards?

Winter season

scotishcensusgov

#Join the data boards

I loaded september to october data boards from 2017 - 2023 to represent the freshers season then I merged the health boards #I summarise total antidepressant prescriptions per Freshers year and plotted the graph to see the trend #Looked at pre coviid , during covid and after covid trend to see if there has been any impact or association

library(tidyverse)
library(here) # directory stucture
library(gt) # tables
library(janitor) # cleaning data
library(ggplot2) # plotting graph
library(sf) # to read in map data 
library(readxl) # to read in map data
library(plotly) # to make interactive
library(viridis)
library(sf)

loading a large amount of data in a shorter time period by downloading and using the mapdfr function (data from 2017-2023)

files <- list.files(here("data", "winter_data"), pattern = "csv")
winter_data <- files %>% 
  map_dfr(~read_csv(here("data", "winter_data", .))) %>% 
clean_names()

clean up data and filter for the sections you want

filtered_winter_data <- winter_data %>% 
filter(str_starts(bnf_item_code,"0403")) %>%  #antidepressant code is 0403
  mutate(year = as.numeric(substr(paid_date_month,1,4)), month = as.numeric(substr(paid_date_month,5,6))) %>% #separates the date into years and month so that i can group winter sections
  mutate(winter_year=case_when(month == 12 ~ year + 1, 
month %in% c(1,2) ~ year) )#makes a new column to group the winter years 

filtered_winter_data <- filtered_winter_data %>% 
  unite("healthboards",hbt2014,hbt,sep = "_")#so some of my data healthboard codes were under the name hbt_2014 AND another was hbt so i had to merge the column so all the healthboard columns fall under one

  filtered_winter_data$healthboards <- gsub("[NA]","",filtered_winter_data$healthboards) 
    filtered_winter_data$healthboards <-
      gsub("_","",filtered_winter_data$healthboards)#had to remove some NA characters and '_' characters

Graph 1

winter_years_data <- filtered_winter_data %>% 
  group_by(winter_year) %>% 
  summarise(total_items=sum(number_of_paid_items,na.rm = TRUE))

plot <- ggplot(winter_years_data,aes(x=winter_year,y=total_items)) +
  geom_line(linewidth=0.7,colour = "blue") +
  geom_point(size=2)+
  scale_x_continuous(breaks=2017:2023) +
  labs(title="Antidepressant Prescriptions During Winter Season",x="Year",y="Total Antidepressant Prescriptions") +
  theme_minimal()
  
ggplotly(plot)
print(plot)

#write a code talking about the zoomed in changes and reference why you dudnt go from 0
#ROUGH
population <- readxl::read_excel(here("data","population.xlsx"), skip=10) %>% 
  clean_names() %>% 
  select(x2,all_people) %>% 
  filter(!is.na(all_people)) %>% 
  rename(h_bname = x2)

filtered2_winter_data <- filtered_winter_data %>% 
  group_by(healthboards,bnf_item_code,winter_year,gp_practice) %>% summarise(total_paid = sum(paid_quantity, na.rm =TRUE))
SIMD <- readxl::read_excel(here("data","SIMD.xlsx")) %>% 
  clean_names() # loading excel data 
filtered_SIMD <- SIMD %>% 
  group_by(simd2020v2_quintile,h_bcode,h_bname) %>% 
  summarise()

filtered_SIMD <- filtered_SIMD %>% 
  rename(healthboards = h_bcode)

Overall_SIMD_winter <- filtered2_winter_data %>% 
  full_join(filtered_SIMD,by = "healthboards")
'relationship = "many-to-many"'
## [1] "relationship = \"many-to-many\""
Overall_SIMD_population_winter <- Overall_SIMD_winter %>% 
  full_join(population,by="h_bname")
antidepressant_per_head <- Overall_SIMD_population_winter %>% 
  group_by(healthboards,h_bname,winter_year) %>% 
  summarise(quantity_per_head = sum(total_paid)/mean(all_people))

### map 
NHS_healthboards <- st_read(here( "data", "Week6_NHS_HealthBoards_2019.shp")) %>% 
clean_names() %>% 
  rename(h_bname = hb_name)
## Reading layer `Week6_NHS_healthboards_2019' from data source 
##   `/Users/olufimihanfaturoti/Year 3 Medicine/data-science/B251495/data/Week6_NHS_healthboards_2019.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 7564.996 ymin: 530635.8 xmax: 468754.8 ymax: 1218625
## Projected CRS: OSGB36 / British National Grid
# Join spatial data with falls_admissions_75_summary
mapped_data <- antidepressant_per_head %>%
  full_join(NHS_healthboards,by="h_bname") %>% 
  st_as_sf()


#CLAUDE : 
  library(ggiraph)

plot_map <- mapped_data %>% 
  ggplot() + 
  geom_sf_interactive(  # Changed from geom_sf
    aes(fill = quantity_per_head,
        tooltip = paste0(h_bname, 
                        "\nWinter Year: ", winter_year,
                        "\nQuantity per Head: ", round(quantity_per_head, 2))),
    colour = "white", 
    size = 0.1
  ) + 
  scale_fill_distiller(palette = "Blues", direction = 1, 
                       name = "Items per Head") +
  labs(
    title = "Antidepressant Prescriptions per Head",
    subtitle = "By Health Board and Winter Year"
  ) +
  facet_wrap(~ winter_year) +
  theme_void() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 10)
  )

interactive_map <- girafe(ggobj = plot_map)  # Changed from ggplotly
interactive_map

TRY PLOT BOX BY 1000 INSTEAD OF

files <- list.files(here("data", "winter_population"), pattern = "csv")
winter_population <- files %>% 
  map_dfr(~read_csv(here("data", "winter_population", .))) %>% 
clean_names()

filtered_winter_population <- winter_population %>% 
  select(date,practice_code,hb,all_ages) %>% 
  mutate(year = as.numeric(substr(date,1,4))) %>% 
  rename(healthboards = hb) %>% 
rename(winter_year = year)

  filtered_winter_population$winter_year <- gsub("2020","2019",filtered_winter_population$winter_year) 
  
  filtered_winter_population$winter_year <- gsub("2023","2022",filtered_winter_population$winter_year) 
  
summary_winter_with_simd <- Overall_SIMD_winter %>% 
  filter(winter_year %in% c("2019","2022")) %>% 
  rename(practice_code = gp_practice) %>% 
  rename(winter_year = winter_year)

#round 2
try_filtered_data <- filtered2_winter_data %>% 
  filter(winter_year %in% c("2019","2022")) %>% 
    rename(practice_code = gp_practice) %>% 
  rename(winter_year = winter_year)

filtered_winter_population$winter_year <- as.numeric(filtered_winter_population$winter_year)
try_filtered_data$winter_year <- as.numeric(try_filtered_data$winter_year)

boxplot2.0 <- left_join(filtered_winter_population,try_filtered_data, by=c("healthboards","practice_code","winter_year")) 

boxplot2.5 <- left_join(boxplot2.0,filtered_SIMD, by=c("healthboards"))


boxplot3 <- boxplot2.5 %>%   
group_by(healthboards,practice_code,all_ages,winter_year,simd2020v2_quintile,h_bname) %>%
  summarise(avg_paid_over_winters = mean(total_paid, na.rm = TRUE))  %>% 
  mutate(avg_per_1000=(avg_paid_over_winters/all_ages)*1000,
         year_label=factor(winter_year,
                           level = c(2019,2022),
                           labels = c("Pre-COVID (2019)","Post-COVID (2022)")))

BOX PLOT FINAL

hb_points <- boxplot3 %>% 
  filter(!is.na(avg_per_1000)) %>%
  group_by(simd2020v2_quintile, h_bname, winter_year) %>% 
  summarise(hb_avg = mean(avg_per_1000, na.rm = TRUE), .groups = "drop") %>% 
  mutate(
    year_label = factor(
      winter_year,
      levels = c(2019, 2022),
      labels = c("Pre-COVID (2019)", "Post-COVID (2022)")) )

# Plot with fixed legend
box <- ggplot(boxplot3 %>% filter(!is.na(avg_per_1000)), 
              aes(x = factor(simd2020v2_quintile), y = avg_per_1000)) +
  
  geom_boxplot(aes(fill = factor(simd2020v2_quintile)),
               outlier.shape = NA,
               colour = "black",
               alpha = 0.5) +
  
  stat_summary(fun = median, geom = "crossbar",
               width = 0.5, color = "black", linewidth = 0.6, fatten = 1) +
  
  geom_point(data = hb_points,
             aes(y = hb_avg, color = h_bname,
                 text = paste0("Health Board: ", h_bname, "<br>",
                               "SIMD: ", simd2020v2_quintile, "<br>",
                               "Avg per 1000: ", round(hb_avg, 1))),
             position = position_jitter(width = 0.2, seed = 42),
             size = 2,
             alpha = 0.8) +
  
  scale_fill_viridis_d(option = "C", alpha = 0.4, guide = "none") +  # Hide SIMD legend
  scale_color_viridis_d(option = "D", name = "Health Board") +  # Clean health board legend
  
  facet_wrap(~year_label) +
  coord_cartesian(ylim = c(0, 2000)) +
  
  theme_light() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.y = element_blank(),
    legend.position = "right",
    legend.text = element_text(size = 9)
  ) +
  labs(
    title = "Average Winter Antidepressant Prescriptions by SIMD Quintile",
    x = "SIMD Quintile (1 = Most Deprived)",
    y = "Avg prescriptions per 1000"
  )

ggplotly(box, tooltip = "text")

ridge lines

box <- ggplot(boxplot3,
       aes(x=factor(simd2020v2_quintile),
           y=avg_per_1000,
           fill=factor(simd2020v2_quintile))) +
  geom_boxplot(outlier.shape = 21,
               outlier.size = 1,
               outlier.stroke = 0.5,
               linewidth = 0.8,
               colour = "black",
               alpha = 0.7) +
  
  # HEALTH BOARD POINTS (with tooltip)
  geom_point(data = hb_points,
    aes(x = factor(simd2020v2_quintile),
      y = hb_avg,
      text = paste0( "Health Board: ", h_bname, "<br>",
        "SIMD: ", simd2020v2_quintile, "<br>",
        "Avg per 1000: ", round(hb_avg, 1), "<br>" )),
    color = "red",
    size = 2,
    alpha = 0.7,
    position = position_jitter(width = 0.2)) +
  scale_y_continuous(limits = c(0, 2000),
                     labels = scales::label_number()) +
  scale_fill_viridis(discrete = TRUE, alpha = 0.9) +
  theme(panel.background = element_rect(fill = "lightblue",
                                    colour = "lightblue",
                                    size = 0.5)) +
  labs( title = "Average Winter Antidepressant Prescriptions per Practice by SIMD Quintile",
    x = "SIMD Quintile (1 = Most Deprived)",
    y = "Avg prescriptions per practice (per 1000)") +
   facet_wrap(~ year_label)

ggplotly(box, tooltip = "text")

NEW PERCENTAGE CHANGE

lolipop_data <- boxplot2.5 %>% 
  select(h_bname, total_paid, simd2020v2_quintile, winter_year) %>% 
  mutate(period = ifelse(winter_year == 2019, "pre", "post"))
# Step 2: Aggregate by Health Board + SIMD + period
lolipop_summary <- lolipop_data %>%
  group_by(h_bname, simd2020v2_quintile, period) %>%
  summarise(total_prescribing = sum(total_paid, na.rm = TRUE)) %>%
# Step 3: Pivot wider: pre vs post
  pivot_wider(names_from = period,
    values_from = total_prescribing) %>%
# Step 4: Calculate percentage change
  mutate(pct_change = ((post - pre) / pre) * 100) %>%
# Step 5: Rank / reorder by SIMD (1 = most deprived)
  arrange(simd2020v2_quintile) %>%
  mutate(h_bname = factor(h_bname, levels = unique(h_bname)))
#plot
 pct_lollipop <- ggplot(lolipop_summary, aes(x = h_bname, y = pct_change)) +
  geom_segment(aes(x = h_bname, xend = h_bname, y = 0, yend = pct_change), 
               color = "skyblue") +
  geom_point(color = "blue", size = 4, alpha = 0.6) +
  theme_light() +
  coord_flip() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.y = element_blank()) +
  labs( title = "Percentage Change in Prescribing per Health Board",
    x = "Health Board",
    y = "Percentage Change (%)")
 
   ggplotly(pct_lollipop)

questions : not sure the best way to displa my original data ? github - how to get rid of the signs 1- overall national trend (use original graph) 2- i want to show the variation between different regions using healthboards 3- link it to deprevation and look at prescriptions per person 4- can i do a map that shows pre covid and post covid side by side would that count as one 5- voilin plot across differet SIMDs to compare smaller unit of data - gp practice (postcode that links to SIMD) PATCHWORK - MAPS TOGETHETE reference line to show the split between precovid, covid and postcovid

every dot is a gp practice - gp practice - dataset - adressess (assessment prep) voilin plot if messy add transperency open data use quintiles for voilin plot do a code that says if not installed install and load packages

percentage increase

overall trend map antidepressant prescribing per head by healthboard facet by winter year? boxplot by SIMD dumbell plot / lollipop graph - percentage change write where the links are from